leftA <- read.table("https://raw.githubusercontent.com/neurodata/graspy/graphmodel/graspy/datasets/drosophila/left_adjacency.csv", header = FALSE, sep = " ")
rightA <- read.table("https://raw.githubusercontent.com/neurodata/graspy/graphmodel/graspy/datasets/drosophila/right_adjacency.csv", header = FALSE, sep = " ")
leftLabels <- read.csv("https://raw.githubusercontent.com/neurodata/graspy/graphmodel/graspy/datasets/drosophila/left_cell_labels.csv", header = FALSE)
rightLabels <- read.csv("https://raw.githubusercontent.com/neurodata/graspy/graphmodel/graspy/datasets/drosophila/right_cell_labels.csv", header = FALSE)X <- as.matrix(rightA)
Xb <- X
Xb[which(Xb > 0)] <- 1
Xase <- irlba(X, nv = 5)
Xl <- Xase$u
Xr <- Xase$v
Xls <- Xase$u / Xase$d
Xrs <- Xase$v / Xase$d
XX <- cbind(Xl, Xr)
XXs <- cbind(Xls, Xrs)
dim(XX)## [1] 213 10
NB: URerF by default normalizes the data before running. This seems to “smooth” the output matrices.
XX.fit <- Urerf(XX , trees = 500, min.parent = 7, normalizeData = TRUE)
XXs.fit <- Urerf(XXs, trees = 500, min.parent = 7, normalizeData = TRUE)
simMa <- XX.fit$sim
dsimMa <- 1 - simMa
simMb <- XXs.fit$sim
dsimMb <- 1 - simMb
### This is the one we use below
simM <- simMa
dsimM <- dsimMa
simMs <- simMb
dsimMs <- dsimMbwrite.table(file = "similarityMatrix.csv", simM, row.names = FALSE, col.names = FALSE, sep = ",")
write.table(file = "dissimilarityMatrix.csv", 1 - simM, row.names = FALSE, col.names = FALSE, sep = ",")
write.table(file = "similarityMatrixScaledSingularVal.csv", simMs, row.names = FALSE, col.names = FALSE, sep = ",")
write.table(file = "dissimilarityMatrixScaledSingularVal.csv", 1 - simMs, row.names = FALSE, col.names = FALSE, sep = ",")## [1] 0.4405913
## [1] 0.4152494
hclust.labels4 <- cutree(h.fit, k = 4)
hclust.labels5 <- cutree(h.fit, k = 5)
hclust.labels6 <- cutree(h.fit, k = 6)## [1] 0.6212183
## [1] 0.6425807
## [1] 0.502164
mi <- 1e4
ARIh <- vector("numeric", mi)
for(i in 1:mi){
set.seed(i)
ARIh[i] <-
mclust::adjustedRandIndex(rightLabels[[1]],
sample(hclust.labels5, replace= FALSE))
}dend5 <- as.dendrogram(h.fit)
dend5 %>% set("labels", hclust.labels5) %>%
color_branches(col = viridis(5)[hclust.labels5]) %>%
sort %>%
plot()